library(ggplot2)ggplot(data, aes(x, y, group, ...)) + geom_obj(...) + modify()qplotdata.frame
geom_bar(), geom_line(), geom_point(), …ggtitle(), scale_x_discrete(), theme_minimal()…ggplot(data=diamonds, aes(x=cut)) + geom_bar()
ggplot's behaviorggplot's behavior. Twice.str or class) before calling ggplotdiamondsstr(diamonds)
## 'data.frame': 53940 obs. of 10 variables: ## $ carat : num 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ... ## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ... ## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ... ## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ... ## $ depth : num 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ... ## $ table : num 55 61 65 58 58 57 57 55 61 61 ... ## $ price : int 326 326 327 334 335 336 336 337 337 338 ... ## $ x : num 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ... ## $ y : num 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ... ## $ z : num 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
head(diamonds)
## carat cut color clarity depth table price x y z ## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 ## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 ## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 ## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63 ## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 ## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
factorggplot(data=diamonds, aes(x=cut)) + geom_bar()
ggp <- ggplot(data=diamonds[1:5,], aes(x=cut)) + geom_bar() ggp
ggp <- ggplot(data=diamonds[1:5,], aes(x=cut)) + geom_bar() ggp + scale_x_discrete(drop=FALSE) # ?scale_x_discrete
ggplot(data=diamonds, aes(x=cut)) + geom_bar() + coord_flip()
ggp <- ggp + xlab("Cut") + ylab("Count") + ggtitle("Hello ggplot!")
ggp
# want to customize colors? refer to: www.cookbook-r.com/Graphs/Colors_(ggplot2)/ ggp + geom_bar(fill="snow", color="black") # see colors() if you're picky
diamonds_precounted <- as.data.frame(table(diamonds$cut, dnn=c("Cut")))
diamonds_precounted
## Cut Freq ## 1 Fair 1610 ## 2 Good 4906 ## 3 Very Good 12082 ## 4 Premium 13791 ## 5 Ideal 21551
ggplot(data=diamonds_precounted, aes(x=Cut)) + geom_bar() # epic FAILggplot(data=diamonds_precounted, aes(x=Cut)) + geom_bar()
ggplot(diamonds_precounted, aes(x=Cut, y=Freq)) + geom_bar(stat="identity") # default is "bin"
stat="identity"stat="bin"diamonds[1:5,]
## carat cut color clarity depth table price x y z ## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 ## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 ## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 ## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63 ## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
ggplot(diamonds[1:5,], aes(x=cut, y=depth)) + geom_bar(stat="identity")
ggplot(diamonds_precounted, aes(x=reorder(Cut, -Freq), y=Freq)) + geom_bar(stat='identity') # The order is determined by factor levels
fillggplot(data=diamonds, aes(x=color, fill=cut)) + geom_bar()
colorggplot(data=diamonds, aes(x=color, color=cut)) + geom_bar()
ggplot(data=diamonds, aes(x=color, fill=cut)) + geom_bar(position="dodge")
ggplot(data=diamonds, aes(x=price)) + geom_bar()
ggplot(data=diamonds, aes(x=price, fill=cut)) + geom_bar(position="stack")
ggplot(data=diamonds, aes(x=price, fill=cut)) + geom_bar(position="identity", alpha=.5)
ggplot(data=diamonds, aes(x=price, fill=cut)) + geom_density(position="identity", alpha=.5)
ggplot(data=diamonds[diamonds$cut %in% c("Fair", "Ideal"),], aes(x=price, fill=cut)) +
geom_density(position="identity", alpha=.5) +
geom_bar(position="identity", alpha=.5)
ggplot(data=diamonds[diamonds$cut %in% c("Fair", "Ideal"),], aes(x=price, y=..density.., fill=cut)) +
geom_density(position="identity", alpha=.5) +
geom_bar(position="identity", alpha=.5)
library(ggplot2)
source("./R/load-all.R")
dat[dat$Precp == 'T', "Precp"] <- '0'
dat$Precp <- as.numeric(dat$Precp)
ggplot(dat, aes(x=city, y=PrecpHour)) + geom_bar(stat="identity")
ggplot(dat, aes(x=city, y=Precp)) + geom_bar(stat="identity")
ggplot(dat, aes(x=city, y=SunShine)) + geom_bar(stat="identity")
dd <- by(dat, dat$city,
function(x) c(rainyHour=sum(x$PrecpHour), sunnyHour=sum(x$SunShine)))
dd <- as.data.frame(do.call(rbind, dd))
dd$city <- rownames(dd)
dd <- reshape(dd, direction="long", varying=c("rainyHour", "sunnyHour"),
idvar="city", v.names="hour", times=c("rainyHour", "sunnyHour"), timevar="status")
ggplot(dd, aes(x=city, y=hour, fill=status)) + geom_bar(stat="identity")
library(ggplot2)
library(data.table) # need 1.9.5 => devtools:install_github("Rdatatable/data.table", build_vignettes=FALSE)
src_path <- "./data"
citynames <- unique(gsub("_.*", '', dir(src_path)))
citynames <- citynames[1:4] # take english version only
dat <- list()
for ( city in citynames )
dat[[city]] <- rbindlist(lapply(grep(city, dir(src_path, full.names=TRUE), value=TRUE), fread))
dat <- rbindlist(dat, idcol="city")
dat[Precp == 'T', Precp:='0']
dat[, Precp:=as.numeric(Precp)]
for ( var in c("PrecpHour", "Precp", "SunShine") )
print(ggplot(dat, aes_string(x="city", y=var)) + geom_bar(stat="identity"))
dd <- melt(dat[, list(PrecpHour=sum(PrecpHour), SunShine=sum(SunShine)), by="city"],
id.vars="city", measure.vars=c("PrecpHour", "SunShine"),
variable.name="status", value.name="hour")
ggplot(dd, aes(x=city, y=hour, fill=status)) + geom_bar(stat="identity")
stat="identity"## color cut cnt pct ## 1: E Ideal 3903 0.39838726 ## 2: E Premium 2337 0.23854241 ## 3: E Good 933 0.09523323 ## 4: I Premium 1428 0.26337145 ## 5: J Good 307 0.10933048 ## 6: J Very Good 678 0.24145299 ## 7: I Very Good 1204 0.22205828 ## 8: H Very Good 1824 0.21965318 ## 9: E Fair 224 0.02286414 ## 10: J Ideal 896 0.31908832 ## 11: F Premium 2331 0.24428841 ## 12: I Ideal 2093 0.38601992 ## 13: I Good 522 0.09627444 ## 14: E Very Good 2400 0.24497295 ## 15: G Very Good 2299 0.20359547 ## 16: D Very Good 1513 0.22332103 ## 17: F Very Good 2164 0.22678684 ## 18: F Good 909 0.09526305 ## 19: H Good 702 0.08453757 ## 20: D Good 662 0.09771218
aggregate and lapplydiamonds$cnt <- 0L
diamondsDF <- aggregate(data=diamonds, cnt ~ color + cut, FUN=length)
diamondsDF <- do.call(rbind, lapply(split(diamondsDF, diamondsDF$color),
function(x) within(x, pct <- cnt / sum(cnt))))
head(diamondsDF)
## color cut cnt pct ## D.1 D Fair 163 0.02405904 ## D.8 D Good 662 0.09771218 ## D.15 D Very Good 1513 0.22332103 ## D.22 D Premium 1603 0.23660517 ## D.29 D Ideal 2834 0.41830258 ## E.2 E Fair 224 0.02286414
table and byDF <- as.data.frame(table(diamonds$color, diamonds$cut, dnn=list("color", "cut")))
DF <- do.call(rbind, by(DF, DF$color,
function(x) within(x, pct <- Freq / sum(Freq))))
head(DF)
## color cut Freq pct ## D.1 D Fair 163 0.02405904 ## D.8 D Good 662 0.09771218 ## D.15 D Very Good 1513 0.22332103 ## D.22 D Premium 1603 0.23660517 ## D.29 D Ideal 2834 0.41830258 ## E.2 E Fair 224 0.02286414
avediamonds$cnt <- ave(rep(1, nrow(diamonds)), diamonds$color, diamonds$cut, FUN=sum)
diamonds$cnt_by_color <- ave(rep(1, nrow(diamonds)), diamonds$color, FUN=sum)
diamonds <- transform(diamonds, pct=cnt / cnt_by_color)
DFave <- unique(diamonds[,c("color", "cut", "cnt", "pct")])
head(DFave)
## color cut cnt pct ## 1 E Ideal 3903 0.39838726 ## 2 E Premium 2337 0.23854241 ## 3 E Good 933 0.09523323 ## 4 I Premium 1428 0.26337145 ## 5 J Good 307 0.10933048 ## 6 J Very Good 678 0.24145299
data.tablelibrary(data.table) diamondsDT <- as.data.table(diamonds) diamondsDT <- diamondsDT[, list(cnt=.N), by="color,cut"] diamondsDT <- diamondsDT[, pct:=cnt / sum(cnt), by="color"] head(diamondsDT)
## color cut cnt pct ## 1: E Ideal 3903 0.39838726 ## 2: E Premium 2337 0.23854241 ## 3: E Good 933 0.09523323 ## 4: I Premium 1428 0.26337145 ## 5: J Good 307 0.10933048 ## 6: J Very Good 678 0.24145299
dplyrlibrary(dplyr) DP <- summarise(group_by(diamonds, color, cut), cnt=n()) DP <- mutate(DP, pct=cnt / sum(cnt)) head(DP)
## Source: local data frame [6 x 4] ## Groups: color ## ## color cut cnt pct ## 1 D Fair 163 0.02405904 ## 2 D Good 662 0.09771218 ## 3 D Very Good 1513 0.22332103 ## 4 D Premium 1603 0.23660517 ## 5 D Ideal 2834 0.41830258 ## 6 E Fair 224 0.02286414
# options(gsubfn.engine='R') # try this line should you have trouble loading the package
library(sqldf)
tmp1 <- sqldf('select color, cut, count(*) as cnt from diamonds group by color, cut')
tmp2 <- sqldf('select color, sum(cnt) as cnt_by_color from tmp1 group by color')
DFSQL <- sqldf('select tmp1.color, cut, cnt, (cnt*1.0 / cnt_by_color) as pct
from tmp1 join tmp2 on tmp1.color = tmp2.color')
head(DFSQL)
## color cut cnt pct ## 1 D Fair 163 0.02405904 ## 2 D Good 662 0.09771218 ## 3 D Ideal 2834 0.41830258 ## 4 D Premium 1603 0.23660517 ## 5 D Very Good 1513 0.22332103 ## 6 E Fair 224 0.02286414
library(ggplot2)
dd2 <- do.call(rbind, by(dd, dd$city,
function(x) within(x, pct <- hour / sum(hour))))
ggplot(dd2, aes(x=city, y=pct, fill=status)) + geom_bar(stat="identity")
# not meaningful but plottable ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line(size=1.5)
ggplot2ggtitle('yor title') is the same as labs(title='your title')?labs for its equivalent callsgeom_vline is the sibling of geom_ablinetheme_bw is a special version of theme
theme_greyhead(WorldPhones) str(WorldPhones)
## N.Amer Europe Asia S.Amer Oceania Africa Mid.Amer ## 1951 45939 21574 2876 1815 1646 89 555 ## 1956 60423 29990 4708 2568 2366 1411 733 ## 1957 64721 32510 5230 2695 2526 1546 773 ## 1958 68484 35218 6662 2845 2691 1663 836 ## 1959 71799 37598 6856 3000 2868 1769 911 ## 1960 76036 40341 8220 3145 3054 1905 1008 ## num [1:7, 1:7] 45939 60423 64721 68484 71799 ... ## - attr(*, "dimnames")=List of 2 ## ..$ : chr [1:7] "1951" "1956" "1957" "1958" ... ## ..$ : chr [1:7] "N.Amer" "Europe" "Asia" "S.Amer" ...
ggplot: data.frame only, please!ggplot(WorldPhones, aes(x=rownames(WorldPhones), y=Asia)) + geom_line() ## Error: ggplot2 doesn't know how to deal with data of class matrix
ggplot eat only data.framesWorldPhones.DF <- as.data.frame(WorldPhones) WorldPhones.DF$year <- rownames(WorldPhones.DF) class(WorldPhones.DF) # this time we should be fine!
## [1] "data.frame"
ggplot(WorldPhones.DF, aes(x=year, y=Asia)) + geom_line(size=1.5)
ggplot(WorldPhones.DF, aes(x=as.numeric(year), y=Asia)) + geom_line(size=1.5)
Remember? Categorical x at default will not show null data.
ggplot2Wide format
## N.Amer Europe Asia year ## 1951 45939 21574 2876 1951 ## 1956 60423 29990 4708 1956 ## 1957 64721 32510 5230 1957 ## 1958 68484 35218 6662 1958 ## 1959 71799 37598 6856 1959 ## 1960 76036 40341 8220 1960 ## 1961 79831 43173 9053 1961
Long format
## Value Region Year ## 1 45939 N.Amer 1951 ## 2 60423 N.Amer 1956 ## 3 64721 N.Amer 1957 ## 4 68484 N.Amer 1958 ## 5 71799 N.Amer 1959 ## 6 76036 N.Amer 1960 ## 7 79831 N.Amer 1961 ## 8 21574 Europe 1951
Wide format
WP=WorldPhones.DF[,c(1:3,8)] head(WP,8)
## N.Amer Europe Asia year ## 1951 45939 21574 2876 1951 ## 1956 60423 29990 4708 1956 ## 1957 64721 32510 5230 1957 ## 1958 68484 35218 6662 1958 ## 1959 71799 37598 6856 1959 ## 1960 76036 40341 8220 1960 ## 1961 79831 43173 9053 1961
Long format (with melt)
WP.long=melt(WP,id.vars='year')
# 以melt將資料轉換成Long Format,以id.vars保留欄位
colnames(WP.long)=
c('Year','Region','Value')
head(WP.long)
## Year Region Value ## 1 1951 N.Amer 45939 ## 2 1956 N.Amer 60423 ## 3 1957 N.Amer 64721 ## 4 1958 N.Amer 68484 ## 5 1959 N.Amer 71799 ## 6 1960 N.Amer 76036
WP.long$Year <- as.integer(as.character(WP.long$Year)) ggplot(WP.long, aes(x=Year, y=Value, color=Region)) + geom_line(size=1.5)
ggplot(WP.long, aes(x=Year, y=Value, linetype=Region))+ geom_line(size=1.5)
ggplot(WP.long,aes(x=factor(Year), y=Value))+ geom_line(aes(linetype=Region,group=Region),size=1.5)
ggplot(WP.long, aes(x=Year, y=Value, linetype=Region)) + geom_line(size=1.5) + guides(linetype=guide_legend(reverse=TRUE))
load("rdata/for_exercise.RData") #load sample data named dat
dat$date = as.Date(substr(dat$T.Max.Time, 1, 10)) # generate date
dat$city = factor(dat$city) # set city as actor
ggplot(dat,aes(x=date,y=RH))+
geom_line(aes(group=city,colour=city),size=1.5)+
# adjust the angle of date label
theme(axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.5),
text=element_text(size=20))
movies1 <- movies[!is.na(movies$budget),] ggplot(movies1, aes(x=budget, y=rating)) + geom_point()
ggplot(movies1, aes(x=budget, y=rating)) + geom_point(shape=5, size=3)
ggplot2This usually happens accidentally.
ggplot(movies1, aes(x=budget, y=rating, color=Action)) + geom_point()
ggplot(movies1, aes(x=budget, y=rating, color=factor(Action))) +
geom_point() + labs(color='Action Movie?')
ggplot(movies1, aes(x=budget, y=rating, color=factor(Action), shape=(length > 120))) +
geom_point(size=3) + labs(color='Action Movie?')
ggplot(movies, aes(x=votes, y=rating)) + geom_point() +
stat_smooth(method=lm, level=.95) # add se=FALSE to disable CI
ggplot(movies, aes(x=votes, y=rating)) + geom_point() + stat_smooth()
ggplot(movies1, aes(x=budget, y=rating, color=factor(Action))) +
geom_point() + labs(color='Action Movie?') + stat_smooth(method=lm, se=FALSE)
lm_model <- lm(rating ~ budget, data=movies1)
ggplot(movies1, aes(x=budget, y=rating)) + geom_point() +
geom_line(aes(x=budget, y=lm_model$fitted.values), color='blue')
geom_textstarmovies <- movies[movies$votes > mean(movies$votes),] starmovies <- starmovies[order(-starmovies$rating),][1:10,] ggplot(starmovies, aes(x=votes, y=rating)) + geom_point() + geom_text(aes(label=title))
ggplot(starmovies, aes(x=votes, y=rating)) + geom_point(color='red') +
geom_text(aes(label=title), hjust=0, vjust=0, angle=20) +
xlim(0, max(starmovies$votes)*2) +
ylim(min(starmovies$rating), 9.2)
We only choose the movies with single type to simplify the question.
movietype <- colnames(movies)[18:24] movies1_singletype <- movies1[rowSums(movies1[, movietype]) == 1,] # remove multi-typed movietype_alt <- c(movietype[length(movietype)], movietype[-length(movietype)]) # convert multiple dummies into one factor as grouping var # a little matrix operation will do the trick dummies <- as.matrix(movies1_singletype[, movietype_alt]) movies1_singletype$Type <- factor(dummies %*% (1:length(movietype_alt)), labels=movietype_alt) # Compute the Average Budget of Each Type tapply(movies1_singletype$budget, movies1_singletype$Type, mean)
## Short Action Animation Comedy Drama Documentary ## 396133.1 32698189.6 32311451.6 11921970.6 10456690.5 729704.8 ## Romance ## 5603688.0
The first factor level of movietype, Short, is represented as the intercept term.
lmfit <- lm(as.formula("budget ~ Type"), movies1_singletype)
summary(lmfit)$coef
## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 396133.1 1715935 0.2308556 8.174455e-01 ## TypeAction 32302056.5 2062063 15.6649187 7.005190e-53 ## TypeAnimation 31915318.5 4157723 7.6761525 2.316874e-14 ## TypeComedy 11525837.5 1888686 6.1025705 1.202481e-09 ## TypeDrama 10060557.4 1820528 5.5261750 3.604075e-08 ## TypeDocumentary 333571.7 2881175 0.1157763 9.078389e-01 ## TypeRomance 5207554.9 3713295 1.4024082 1.609149e-01
The last predictor, Short is combined into the intercept term.
# mean(movies1_singletype[movies1_singletype$Animation == 1, 'budget'])
lmfit <- lm(as.formula(paste('budget ~', paste(movietype, collapse=' + '))),
movies1_singletype)
summary(lmfit)$coef
## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 396133.1 1715935 0.2308556 8.174455e-01 ## Action 32302056.5 2062063 15.6649187 7.005190e-53 ## Animation 31915318.5 4157723 7.6761525 2.316874e-14 ## Comedy 11525837.5 1888686 6.1025705 1.202481e-09 ## Drama 10060557.4 1820528 5.5261750 3.604075e-08 ## Documentary 333571.7 2881175 0.1157763 9.078389e-01 ## Romance 5207554.9 3713295 1.4024082 1.609149e-01
What is the association between cost and rating, conditional on type?
movies1_reg_plot <- ggplot(movies1_singletype, aes(x=budget, y=rating, color=Type)) +
geom_point(shape=1) +
# set fullrange=T will extend the fitted line outside the sample range
stat_smooth(method=lm, se=FALSE, fullrange=FALSE, size=1.5) +
# color is the grouping interface, hence scale_color_*
scale_color_discrete(name='Movie Type: # of samples',
labels=paste(levels(movies1_singletype$Type), ': ',
table(movies1_singletype$Type)))
movies1_reg_plot
Draw a scatter plot of Temprature and RH in Taipei, Taichung, Kaoshiung and Hualien in 2013-05, 2014-05 and 2015-05. Furthermore, each location has its own color and regression line.
Hint: Use following code to read all data and slice the data you want.
load("rdata/for_exercise.RData") # load sample data names dat
dat$month <- substr(dat$T.Max.Time, 6, 7)
dat <- dat[dat$month == "05",]
ggplot(dat, aes(x=RH, y=Temperature, color=city)) +
geom_point(shape=1) +
stat_smooth(method = lm, size = 1.5)
interact_terms <- paste(paste(movietype, '*budget', sep=''), collapse=' + ')
lmfit <- lm(as.formula(paste('rating ~', interact_terms)), movies1_singletype)
tail(summary(lmfit)$coef)
## Estimate Std. Error t value Pr(>|t|) ## Action:budget 1.580218e-08 3.331784e-08 0.47428594 0.6353367 ## budget:Animation 8.246542e-09 3.379426e-08 0.24402199 0.8072334 ## budget:Comedy -6.222262e-10 3.337127e-08 -0.01864557 0.9851253 ## budget:Drama 1.295424e-08 3.333295e-08 0.38863176 0.6975810 ## budget:Documentary -8.504501e-08 9.010076e-08 -0.94388788 0.3453164 ## budget:Romance -3.983138e-08 4.086954e-08 -0.97459824 0.3298521
ggplot(data=humidity)+geom_line(aes(x=date,y=humidity))
data
## date location humidity ## 1 2015-05-01 \xaaὬ 91 ## 2 2015-05-02 \xaaὬ 86 ## 3 2015-05-03 \xaaὬ 82 ## 4 2015-05-04 \xaaὬ 78 ## 5 2015-05-05 \xaaὬ 90 ## 6 2015-05-06 \xaaὬ 83
mapping: aes(x=…,y=…)
## date humidity ## 1 2015-05-01 91 ## 2 2015-05-02 86 ## 3 2015-05-03 82 ## 4 2015-05-04 78 ## 5 2015-05-05 90 ## 6 2015-05-06 83
geom_line
ggplot(humidity,aes(x=date,y=humidity))+ geom_line(aes(group=location))
geom_point
ggplot(humidity,aes(x=date,y=humidity))+ geom_point()
ggplot(humidity,aes(x=date,y=humidity))+ geom_line(aes(group=location))
ggplot(humidity,aes(x=date,y=humidity))+ geom_line(aes(group=location))+ scale_x_date(breaks = "2 weeks")
ggplot(pressure,aes(x=temperature,y=pressure))+ geom_point()+ stat_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
hum=filter(humidity,rainfall>0) %>% select(location,rainfall) %>% group_by(location) %>% summarise(ave.rain=mean(rainfall)) ggplot(hum,aes(x=location,y=ave.rain))+geom_bar(aes(fill=location),stat='identity')
ggplot(hum,aes(x=location,y=ave.rain))+ geom_bar(aes(fill=location),stat='identity')+ coord_polar(theta='y')
ggplot(hum,aes(x="",y=ave.rain))+ geom_bar(aes(fill=location),stat='identity')+ coord_polar(theta='y')
ggplot(humidity,aes(x=date,y=humidity))+
geom_line(aes(group=location,colour=location),size=1.5)+
scale_x_date(breaks = "7 days",labels = date_format("%m/%d"))+
facet_wrap(~location)
ggplot(humidity,aes(x=date,y=humidity))+ geom_line(aes(group=location,colour=location),size=1.5)+ theme_light(base_size = 20)
plot(movies1$budget, movies1$rating) # base solution
abline(h=median(movies1$rating), col='red')
text(x=max(movies1$budget)*.9, y=median(movies1$rating),
labels='Median of Rating', col='red', pos=1)
brggp <- ggplot(movies1, aes(x=budget, y=rating)) + geom_point() brggp + geom_hline(yintercept=median(movies1$rating)) # ?geom_abline for general setup # brggp + geom_hline(data=movies1, aes(yintercept=median(rating))) # the same # brggp + geom_hline(aes(yintercept=median(movies1$rating))) # the same
brggp + geom_hline(yintercept=median(movies1$rating), color='red') +
annotate('text', x=Inf, y=median(movies1$rating),
label='Medaion of Rating', color='red', vjust=1.2, hjust=1)
# Don't use geom_text for single annotation to avoid overplotting
shaw <- movies1[grep('Shawshank Redemption', movies1$title, fixed=TRUE),]
brggp + annotate('segment', xend=shaw$budget, yend=shaw$rating, x=Inf, y=-Inf,
arrow=grid::arrow(), color='red') +
annotate('text', label='The Shawshank Redemption?', x=Inf, y=-Inf,
hjust=1.5, vjust=-1, color='red')
yearcount <- aggregate(title ~ year, data=movies, FUN=length)
ggplot(yearcount, aes(x=year, y=title)) + geom_line() +
annotate('rect', xmin=1990, xmax=2000, ymin=-Inf, ymax=Inf, fill='blue', alpha=.25)
gg <- ggplot(movies1_singletype, aes(x=rating, y=..density..)) + geom_bar() gg + facet_grid(Action ~ .) # Plot with grouping variable in different window (Vertical)
# Plot with grouping variable in different window (Horizontal) gg + facet_grid(. ~ Action)
movies1_singletype$modern <- (movies1_singletype$year > 2000) ggplot(movies1_singletype, aes(x=rating, y=..density..)) + geom_bar() + facet_grid(modern ~ Action)
movies1_singletype$rated <- (movies1_singletype$mpaa != '') ggplot(movies1_singletype, aes(x=rating, color=modern)) + geom_line(stat="density") + facet_grid(Type ~ rated)
facet_gridprint(label_value) # the default labeller plugg in facet_grid(..., labeller)
## function (variable, value) ## as.character(value) ## <environment: namespace:ggplot2>
mylabeller <- function(variable, value){
if ( variable=='rated' )
value <- ifelse(value == TRUE, 'Rated Movies', 'Unrated Movies')
else if ( variable=='Type' )
as.character(value)
}
ggplot(movies1_singletype, aes(x=rating, color=modern)) + geom_line(stat="density") + facet_grid(Type ~ rated, labeller=mylabeller)
gridExtra (1/3)library(gridExtra)
drawPoint <- function(i) {
ggplot(data.frame(x=1, y=1), aes(x=x,y=y)) +
geom_point(shape=i, size=5) +
ggtitle(sprintf('shape=%s',i)) +
theme(axis.text.x=element_blank(), axis.text.y=element_blank()) +
xlab(NULL) + ylab(NULL)
}
drawPoint(25)
gridExtra (2/3)symbol_points <- mapply(drawPoint, 1:25, SIMPLIFY=FALSE) symbols <- do.call(arrangeGrob, symbol_points) # plot(symbols)
gridExtra (3/3)